Libraries

# if (!require("renv")) install.packages("renv")
# library(renv)
# renv::restore()
library(here)
library(dplyr)
library(readr)
library(arrow)

Read data

zip_file <- here("data", "raw", "iter_00_cpv2020_csv.zip")
temp_dir <- here("temp")
dir.create(temp_dir, showWarnings = FALSE)

unzip(zip_file, files = c("iter_00_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv", "iter_00_cpv2020/diccionario_datos/diccionario_datos_iter_00CSV20.csv"), exdir = temp_dir)

data_path <- here(temp_dir,
                 "iter_00_cpv2020",
                 "conjunto_de_datos",
                 "conjunto_de_datos_iter_00CSV20.csv")

dict_path <- here(temp_dir,
                 "iter_00_cpv2020",
                 "diccionario_datos",
                 "diccionario_datos_iter_00CSV20.csv")

info_dict <- read_csv(dict_path)
New names:Rows: 290 Columns: 10── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): ...1, ...2, ...3, ...4, ...5, ...6
lgl (4): ...7, ...8, ...9, ...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- read_csv(data_path)
Rows: 195662 Columns: 286── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (283): ENTIDAD, NOM_ENT, MUN, NOM_MUN, LOC, NOM_LOC, LONGITUD, LATITUD, ALTITUD, POBFEM, POBMAS, P_0A2, P_0A2_F, P_0A2_M, P_3YMAS, P_3YMAS_F, P_3Y...
dbl   (3): POBTOT, VIVTOT, TVIVHAB
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unlink(temp_dir, recursive = TRUE)
# Exporting dictionary file
write_csv(info_dict,
          here("data", "raw", "diccionario_datos_iter_00CSV20.csv"))

Exploration

head(df)
head(info_dict)
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ENTIDAD    : chr [1:195662] "00" "00" "00" "01" ...
 $ NOM_ENT    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
 $ MUN        : chr [1:195662] "000" "000" "000" "000" ...
 $ NOM_MUN    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
 $ LOC        : chr [1:195662] "0000" "9998" "9999" "0000" ...
 $ NOM_LOC    : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
 $ LONGITUD   : chr [1:195662] NA NA NA NA ...
 $ LATITUD    : chr [1:195662] NA NA NA NA ...
 $ ALTITUD    : chr [1:195662] NA NA NA NA ...
 $ POBTOT     : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
 $ POBFEM     : chr [1:195662] "64540634" "96869" "61324" "728924" ...
 $ POBMAS     : chr [1:195662] "61473390" "153485" "85801" "696683" ...
 $ P_0A2      : chr [1:195662] "5764054" "10493" "6798" "71864" ...
 $ P_0A2_F    : chr [1:195662] "2848875" "5193" "3407" "35604" ...
 $ P_0A2_M    : chr [1:195662] "2915179" "5300" "3391" "36260" ...
 $ P_3YMAS    : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
 $ P_3YMAS_F  : chr [1:195662] "61554567" "91463" "57628" "692561" ...
 $ P_3YMAS_M  : chr [1:195662] "58422017" "147978" "82129" "659674" ...
 $ P_5YMAS    : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
 $ P_5YMAS_F  : chr [1:195662] "59433559" "87931" "55256" "666713" ...
 $ P_5YMAS_M  : chr [1:195662] "56259714" "144155" "79772" "632956" ...
 $ P_12YMAS   : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
 $ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
 $ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
 $ P_15YMAS   : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
 $ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
 $ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
 $ P_18YMAS   : chr [1:195662] "87492680" "186968" "104612" "960764" ...
 $ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
 $ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
 $ P_3A5      : chr [1:195662] "6462212" "10900" "7028" "78833" ...
 $ P_3A5_F    : chr [1:195662] "3193548" "5270" "3511" "38679" ...
 $ P_3A5_M    : chr [1:195662] "3268664" "5630" "3517" "40154" ...
 $ P_6A11     : chr [1:195662] "12986217" "20793" "13506" "156683" ...
 $ P_6A11_F   : chr [1:195662] "6398755" "10082" "6574" "77289" ...
 $ P_6A11_M   : chr [1:195662] "6587462" "10711" "6932" "79394" ...
 $ P_8A14     : chr [1:195662] "15287375" "24342" "16724" "181905" ...
 $ P_8A14_F   : chr [1:195662] "7531118" "11538" "7679" "89383" ...
 $ P_8A14_M   : chr [1:195662] "7756257" "12804" "9045" "92522" ...
 $ P_12A14    : chr [1:195662] "6542801" "10337" "7693" "77815" ...
 $ P_12A14_F  : chr [1:195662] "3229273" "4767" "3268" "38206" ...
 $ P_12A14_M  : chr [1:195662] "3313528" "5570" "4425" "39609" ...
 $ P_15A17    : chr [1:195662] "6492674" "10443" "6918" "78140" ...
 $ P_15A17_F  : chr [1:195662] "3202134" "4830" "3091" "38298" ...
 $ P_15A17_M  : chr [1:195662] "3290540" "5613" "3827" "39842" ...
 $ P_18A24    : chr [1:195662] "14736111" "27841" "16336" "180847" ...
 $ P_18A24_F  : chr [1:195662] "7398617" "11140" "6760" "90632" ...
 $ P_18A24_M  : chr [1:195662] "7337494" "16701" "9576" "90215" ...
 $ P_15A49_F  : chr [1:195662] "33885546" "47693" "29297" "388917" ...
 $ P_60YMAS   : chr [1:195662] "15142976" "37383" "21277" "145376" ...
 $ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
 $ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
 $ REL_H_M    : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
 $ POB0_14    : chr [1:195662] "31755284" "52523" "35025" "385195" ...
 $ POB15_64   : chr [1:195662] "83663440" "171209" "96250" "941834" ...
 $ POB65_MAS  : chr [1:195662] "10321914" "26202" "15280" "97070" ...
 $ P_0A4      : chr [1:195662] "10047365" "17848" "11527" "124430" ...
 $ P_0A4_F    : chr [1:195662] "4969883" "8725" "5779" "61452" ...
 $ P_0A4_M    : chr [1:195662] "5077482" "9123" "5748" "62978" ...
 $ P_5A9      : chr [1:195662] "10764379" "17380" "11274" "131048" ...
 $ P_5A9_F    : chr [1:195662] "5311288" "8526" "5558" "64689" ...
 $ P_5A9_M    : chr [1:195662] "5453091" "8854" "5716" "66359" ...
 $ P_10A14    : chr [1:195662] "10943540" "17295" "12224" "129717" ...
 $ P_10A14_F  : chr [1:195662] "5389280" "8061" "5423" "63637" ...
 $ P_10A14_M  : chr [1:195662] "5554260" "9234" "6801" "66080" ...
 $ P_15A19    : chr [1:195662] "10806690" "18303" "11484" "131967" ...
 $ P_15A19_F  : chr [1:195662] "5344540" "8138" "5140" "65064" ...
 $ P_15A19_M  : chr [1:195662] "5462150" "10165" "6344" "66903" ...
 $ P_20A24    : chr [1:195662] "10422095" "19981" "11770" "127020" ...
 $ P_20A24_F  : chr [1:195662] "5256211" "7832" "4711" "63866" ...
 $ P_20A24_M  : chr [1:195662] "5165884" "12149" "7059" "63154" ...
 $ P_25A29    : chr [1:195662] "9993001" "20584" "12238" "118426" ...
 $ P_25A29_F  : chr [1:195662] "5131597" "7125" "4427" "60285" ...
 $ P_25A29_M  : chr [1:195662] "4861404" "13459" "7811" "58141" ...
 $ P_30A34    : chr [1:195662] "9420827" "19601" "11315" "106825" ...
 $ P_30A34_F  : chr [1:195662] "4893101" "6309" "4074" "55174" ...
 $ P_30A34_M  : chr [1:195662] "4527726" "13292" "7241" "51651" ...
 $ P_35A39    : chr [1:195662] "9020276" "18645" "10357" "99257" ...
 $ P_35A39_F  : chr [1:195662] "4688746" "6289" "3825" "51483" ...
 $ P_35A39_M  : chr [1:195662] "4331530" "12356" "6532" "47774" ...
 $ P_40A44    : chr [1:195662] "8503586" "17934" "9705" "92378" ...
 $ P_40A44_F  : chr [1:195662] "4441282" "6060" "3743" "48539" ...
 $ P_40A44_M  : chr [1:195662] "4062304" "11874" "5962" "43839" ...
 $ P_45A49    : chr [1:195662] "7942413" "16840" "8668" "84669" ...
 $ P_45A49_F  : chr [1:195662] "4130069" "5940" "3377" "44506" ...
 $ P_45A49_M  : chr [1:195662] "3812344" "10900" "5291" "40163" ...
 $ P_50A54    : chr [1:195662] "7037532" "15070" "7878" "74121" ...
 $ P_50A54_F  : chr [1:195662] "3705369" "5481" "3239" "39510" ...
 $ P_50A54_M  : chr [1:195662] "3332163" "9589" "4639" "34611" ...
 $ P_55A59    : chr [1:195662] "5695958" "13070" "6838" "58865" ...
 $ P_55A59_F  : chr [1:195662] "3002982" "4728" "2823" "31257" ...
 $ P_55A59_M  : chr [1:195662] "2692976" "8342" "4015" "27608" ...
 $ P_60A64    : chr [1:195662] "4821062" "11181" "5997" "48306" ...
 $ P_60A64_F  : chr [1:195662] "2563200" "4050" "2511" "25871" ...
 $ P_60A64_M  : chr [1:195662] "2257862" "7131" "3486" "22435" ...
 $ P_65A69    : chr [1:195662] "3645077" "9160" "5052" "35823" ...
 $ P_65A69_F  : chr [1:195662] "1938227" "3343" "2130" "19125" ...
 $ P_65A69_M  : chr [1:195662] "1706850" "5817" "2922" "16698" ...
 $ P_70A74    : chr [1:195662] "2647340" "6903" "3852" "25586" ...
  [list output truncated]
 - attr(*, "spec")=
  .. cols(
  ..   ENTIDAD = col_character(),
  ..   NOM_ENT = col_character(),
  ..   MUN = col_character(),
  ..   NOM_MUN = col_character(),
  ..   LOC = col_character(),
  ..   NOM_LOC = col_character(),
  ..   LONGITUD = col_character(),
  ..   LATITUD = col_character(),
  ..   ALTITUD = col_character(),
  ..   POBTOT = col_double(),
  ..   POBFEM = col_character(),
  ..   POBMAS = col_character(),
  ..   P_0A2 = col_character(),
  ..   P_0A2_F = col_character(),
  ..   P_0A2_M = col_character(),
  ..   P_3YMAS = col_character(),
  ..   P_3YMAS_F = col_character(),
  ..   P_3YMAS_M = col_character(),
  ..   P_5YMAS = col_character(),
  ..   P_5YMAS_F = col_character(),
  ..   P_5YMAS_M = col_character(),
  ..   P_12YMAS = col_character(),
  ..   P_12YMAS_F = col_character(),
  ..   P_12YMAS_M = col_character(),
  ..   P_15YMAS = col_character(),
  ..   P_15YMAS_F = col_character(),
  ..   P_15YMAS_M = col_character(),
  ..   P_18YMAS = col_character(),
  ..   P_18YMAS_F = col_character(),
  ..   P_18YMAS_M = col_character(),
  ..   P_3A5 = col_character(),
  ..   P_3A5_F = col_character(),
  ..   P_3A5_M = col_character(),
  ..   P_6A11 = col_character(),
  ..   P_6A11_F = col_character(),
  ..   P_6A11_M = col_character(),
  ..   P_8A14 = col_character(),
  ..   P_8A14_F = col_character(),
  ..   P_8A14_M = col_character(),
  ..   P_12A14 = col_character(),
  ..   P_12A14_F = col_character(),
  ..   P_12A14_M = col_character(),
  ..   P_15A17 = col_character(),
  ..   P_15A17_F = col_character(),
  ..   P_15A17_M = col_character(),
  ..   P_18A24 = col_character(),
  ..   P_18A24_F = col_character(),
  ..   P_18A24_M = col_character(),
  ..   P_15A49_F = col_character(),
  ..   P_60YMAS = col_character(),
  ..   P_60YMAS_F = col_character(),
  ..   P_60YMAS_M = col_character(),
  ..   REL_H_M = col_character(),
  ..   POB0_14 = col_character(),
  ..   POB15_64 = col_character(),
  ..   POB65_MAS = col_character(),
  ..   P_0A4 = col_character(),
  ..   P_0A4_F = col_character(),
  ..   P_0A4_M = col_character(),
  ..   P_5A9 = col_character(),
  ..   P_5A9_F = col_character(),
  ..   P_5A9_M = col_character(),
  ..   P_10A14 = col_character(),
  ..   P_10A14_F = col_character(),
  ..   P_10A14_M = col_character(),
  ..   P_15A19 = col_character(),
  ..   P_15A19_F = col_character(),
  ..   P_15A19_M = col_character(),
  ..   P_20A24 = col_character(),
  ..   P_20A24_F = col_character(),
  ..   P_20A24_M = col_character(),
  ..   P_25A29 = col_character(),
  ..   P_25A29_F = col_character(),
  ..   P_25A29_M = col_character(),
  ..   P_30A34 = col_character(),
  ..   P_30A34_F = col_character(),
  ..   P_30A34_M = col_character(),
  ..   P_35A39 = col_character(),
  ..   P_35A39_F = col_character(),
  ..   P_35A39_M = col_character(),
  ..   P_40A44 = col_character(),
  ..   P_40A44_F = col_character(),
  ..   P_40A44_M = col_character(),
  ..   P_45A49 = col_character(),
  ..   P_45A49_F = col_character(),
  ..   P_45A49_M = col_character(),
  ..   P_50A54 = col_character(),
  ..   P_50A54_F = col_character(),
  ..   P_50A54_M = col_character(),
  ..   P_55A59 = col_character(),
  ..   P_55A59_F = col_character(),
  ..   P_55A59_M = col_character(),
  ..   P_60A64 = col_character(),
  ..   P_60A64_F = col_character(),
  ..   P_60A64_M = col_character(),
  ..   P_65A69 = col_character(),
  ..   P_65A69_F = col_character(),
  ..   P_65A69_M = col_character(),
  ..   P_70A74 = col_character(),
  ..   P_70A74_F = col_character(),
  ..   P_70A74_M = col_character(),
  ..   P_75A79 = col_character(),
  ..   P_75A79_F = col_character(),
  ..   P_75A79_M = col_character(),
  ..   P_80A84 = col_character(),
  ..   P_80A84_F = col_character(),
  ..   P_80A84_M = col_character(),
  ..   P_85YMAS = col_character(),
  ..   P_85YMAS_F = col_character(),
  ..   P_85YMAS_M = col_character(),
  ..   PROM_HNV = col_character(),
  ..   PNACENT = col_character(),
  ..   PNACENT_F = col_character(),
  ..   PNACENT_M = col_character(),
  ..   PNACOE = col_character(),
  ..   PNACOE_F = col_character(),
  ..   PNACOE_M = col_character(),
  ..   PRES2015 = col_character(),
  ..   PRES2015_F = col_character(),
  ..   PRES2015_M = col_character(),
  ..   PRESOE15 = col_character(),
  ..   PRESOE15_F = col_character(),
  ..   PRESOE15_M = col_character(),
  ..   P3YM_HLI = col_character(),
  ..   P3YM_HLI_F = col_character(),
  ..   P3YM_HLI_M = col_character(),
  ..   P3HLINHE = col_character(),
  ..   P3HLINHE_F = col_character(),
  ..   P3HLINHE_M = col_character(),
  ..   P3HLI_HE = col_character(),
  ..   P3HLI_HE_F = col_character(),
  ..   P3HLI_HE_M = col_character(),
  ..   P5_HLI = col_character(),
  ..   P5_HLI_NHE = col_character(),
  ..   P5_HLI_HE = col_character(),
  ..   PHOG_IND = col_character(),
  ..   POB_AFRO = col_character(),
  ..   POB_AFRO_F = col_character(),
  ..   POB_AFRO_M = col_character(),
  ..   PCON_DISC = col_character(),
  ..   PCDISC_MOT = col_character(),
  ..   PCDISC_VIS = col_character(),
  ..   PCDISC_LENG = col_character(),
  ..   PCDISC_AUD = col_character(),
  ..   PCDISC_MOT2 = col_character(),
  ..   PCDISC_MEN = col_character(),
  ..   PCON_LIMI = col_character(),
  ..   PCLIM_CSB = col_character(),
  ..   PCLIM_VIS = col_character(),
  ..   PCLIM_HACO = col_character(),
  ..   PCLIM_OAUD = col_character(),
  ..   PCLIM_MOT2 = col_character(),
  ..   PCLIM_RE_CO = col_character(),
  ..   PCLIM_PMEN = col_character(),
  ..   PSIND_LIM = col_character(),
  ..   P3A5_NOA = col_character(),
  ..   P3A5_NOA_F = col_character(),
  ..   P3A5_NOA_M = col_character(),
  ..   P6A11_NOA = col_character(),
  ..   P6A11_NOAF = col_character(),
  ..   P6A11_NOAM = col_character(),
  ..   P12A14NOA = col_character(),
  ..   P12A14NOAF = col_character(),
  ..   P12A14NOAM = col_character(),
  ..   P15A17A = col_character(),
  ..   P15A17A_F = col_character(),
  ..   P15A17A_M = col_character(),
  ..   P18A24A = col_character(),
  ..   P18A24A_F = col_character(),
  ..   P18A24A_M = col_character(),
  ..   P8A14AN = col_character(),
  ..   P8A14AN_F = col_character(),
  ..   P8A14AN_M = col_character(),
  ..   P15YM_AN = col_character(),
  ..   P15YM_AN_F = col_character(),
  ..   P15YM_AN_M = col_character(),
  ..   P15YM_SE = col_character(),
  ..   P15YM_SE_F = col_character(),
  ..   P15YM_SE_M = col_character(),
  ..   P15PRI_IN = col_character(),
  ..   P15PRI_INF = col_character(),
  ..   P15PRI_INM = col_character(),
  ..   P15PRI_CO = col_character(),
  ..   P15PRI_COF = col_character(),
  ..   P15PRI_COM = col_character(),
  ..   P15SEC_IN = col_character(),
  ..   P15SEC_INF = col_character(),
  ..   P15SEC_INM = col_character(),
  ..   P15SEC_CO = col_character(),
  ..   P15SEC_COF = col_character(),
  ..   P15SEC_COM = col_character(),
  ..   P18YM_PB = col_character(),
  ..   P18YM_PB_F = col_character(),
  ..   P18YM_PB_M = col_character(),
  ..   GRAPROES = col_character(),
  ..   GRAPROES_F = col_character(),
  ..   GRAPROES_M = col_character(),
  ..   PEA = col_character(),
  ..   PEA_F = col_character(),
  ..   PEA_M = col_character(),
  ..   PE_INAC = col_character(),
  ..   PE_INAC_F = col_character(),
  ..   PE_INAC_M = col_character(),
  ..   POCUPADA = col_character(),
  ..   POCUPADA_F = col_character(),
  ..   POCUPADA_M = col_character(),
  ..   PDESOCUP = col_character(),
  ..   PDESOCUP_F = col_character(),
  ..   PDESOCUP_M = col_character(),
  ..   PSINDER = col_character(),
  ..   PDER_SS = col_character(),
  ..   PDER_IMSS = col_character(),
  ..   PDER_ISTE = col_character(),
  ..   PDER_ISTEE = col_character(),
  ..   PAFIL_PDOM = col_character(),
  ..   PDER_SEGP = col_character(),
  ..   PDER_IMSSB = col_character(),
  ..   PAFIL_IPRIV = col_character(),
  ..   PAFIL_OTRAI = col_character(),
  ..   P12YM_SOLT = col_character(),
  ..   P12YM_CASA = col_character(),
  ..   P12YM_SEPA = col_character(),
  ..   PCATOLICA = col_character(),
  ..   PRO_CRIEVA = col_character(),
  ..   POTRAS_REL = col_character(),
  ..   PSIN_RELIG = col_character(),
  ..   TOTHOG = col_character(),
  ..   HOGJEF_F = col_character(),
  ..   HOGJEF_M = col_character(),
  ..   POBHOG = col_character(),
  ..   PHOGJEF_F = col_character(),
  ..   PHOGJEF_M = col_character(),
  ..   VIVTOT = col_double(),
  ..   TVIVHAB = col_double(),
  ..   TVIVPAR = col_character(),
  ..   VIVPAR_HAB = col_character(),
  ..   VIVPARH_CV = col_character(),
  ..   TVIVPARHAB = col_character(),
  ..   VIVPAR_DES = col_character(),
  ..   VIVPAR_UT = col_character(),
  ..   OCUPVIVPAR = col_character(),
  ..   PROM_OCUP = col_character(),
  ..   PRO_OCUP_C = col_character(),
  ..   VPH_PISODT = col_character(),
  ..   VPH_PISOTI = col_character(),
  ..   VPH_1DOR = col_character(),
  ..   VPH_2YMASD = col_character(),
  ..   VPH_1CUART = col_character(),
  ..   VPH_2CUART = col_character(),
  ..   VPH_3YMASC = col_character(),
  ..   VPH_C_ELEC = col_character(),
  ..   VPH_S_ELEC = col_character(),
  ..   VPH_AGUADV = col_character(),
  ..   VPH_AEASP = col_character(),
  ..   VPH_AGUAFV = col_character(),
  ..   VPH_TINACO = col_character(),
  ..   VPH_CISTER = col_character(),
  ..   VPH_EXCSA = col_character(),
  ..   VPH_LETR = col_character(),
  ..   VPH_DRENAJ = col_character(),
  ..   VPH_NODREN = col_character(),
  ..   VPH_C_SERV = col_character(),
  ..   VPH_NDEAED = col_character(),
  ..   VPH_DSADMA = col_character(),
  ..   VPH_NDACMM = col_character(),
  ..   VPH_SNBIEN = col_character(),
  ..   VPH_REFRI = col_character(),
  ..   VPH_LAVAD = col_character(),
  ..   VPH_HMICRO = col_character(),
  ..   VPH_AUTOM = col_character(),
  ..   VPH_MOTO = col_character(),
  ..   VPH_BICI = col_character(),
  ..   VPH_RADIO = col_character(),
  ..   VPH_TV = col_character(),
  ..   VPH_PC = col_character(),
  ..   VPH_TELEF = col_character(),
  ..   VPH_CEL = col_character(),
  ..   VPH_INTER = col_character(),
  ..   VPH_STVP = col_character(),
  ..   VPH_SPMVPI = col_character(),
  ..   VPH_CVJ = col_character(),
  ..   VPH_SINRTV = col_character(),
  ..   VPH_SINLTC = col_character(),
  ..   VPH_SINCINT = col_character(),
  ..   VPH_SINTIC = col_character(),
  ..   TAMLOC = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
clean_info_dict <- info_dict[-c(1:3), ]
names(clean_info_dict) <- clean_info_dict[1, ]
Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.Warning: The `value` argument of `names<-` must be a character vector as of tibble 3.0.0.
clean_info_dict <- clean_info_dict[-1,]
clean_info_dict <- clean_info_dict[, -c(7:10)]


clean_info_dict
unique_states <- df |> 
      distinct(NOM_ENT)

write_csv(unique_states, here("data", "processed", "entity_names.csv"))


unique_states
entities_csv <- read_csv(here("data", "processed", "entity_names.csv")) |> pull()
Rows: 33 Columns: 1── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): NOM_ENT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(entities_csv)
 [1] "Total nacional"                  "Aguascalientes"                  "Baja California"                 "Baja California Sur"            
 [5] "Campeche"                        "Coahuila de Zaragoza"            "Colima"                          "Chiapas"                        
 [9] "Chihuahua"                       "Ciudad de México"                "Durango"                         "Guanajuato"                     
[13] "Guerrero"                        "Hidalgo"                         "Jalisco"                         "México"                         
[17] "Michoacán de Ocampo"             "Morelos"                         "Nayarit"                         "Nuevo León"                     
[21] "Oaxaca"                          "Puebla"                          "Querétaro"                       "Quintana Roo"                   
[25] "San Luis Potosí"                 "Sinaloa"                         "Sonora"                          "Tabasco"                        
[29] "Tamaulipas"                      "Tlaxcala"                        "Veracruz de Ignacio de la Llave" "Yucatán"                        
[33] "Zacatecas"                      

Selecting rows that we’ll analyze

rows_to_include <- c(1:12, 53:132, 136:140, 147, 155:211, 220:232)

filtered_data <- clean_info_dict |> 
      filter(row_number() %in% rows_to_include) |> 
      pull(4)

filtered_data
  [1] "ENTIDAD"    "NOM_ENT"    "MUN"        "NOM_MUN"    "LOC"        "NOM_LOC"    "LONGITUD"   "LATITUD"    "ALTITUD"    "POBTOT"     "POBFEM"    
 [12] "POBMAS"     "REL_H_M"    "POB0_14"    "POB15_64"   "POB65_MAS"  "P_0A4"      "P_0A4_F"    "P_0A4_M"    "P_5A9"      "P_5A9_F"    "P_5A9_M"   
 [23] "P_10A14"    "P_10A14_F"  "P_10A14_M"  "P_15A19"    "P_15A19_F"  "P_15A19_M"  "P_20A24"    "P_20A24_F"  "P_20A24_M"  "P_25A29"    "P_25A29_F" 
 [34] "P_25A29_M"  "P_30A34"    "P_30A34_F"  "P_30A34_M"  "P_35A39"    "P_35A39_F"  "P_35A39_M"  "P_40A44"    "P_40A44_F"  "P_40A44_M"  "P_45A49"   
 [45] "P_45A49_F"  "P_45A49_M"  "P_50A54"    "P_50A54_F"  "P_50A54_M"  "P_55A59"    "P_55A59_F"  "P_55A59_M"  "P_60A64"    "P_60A64_F"  "P_60A64_M" 
 [56] "P_65A69"    "P_65A69_F"  "P_65A69_M"  "P_70A74"    "P_70A74_F"  "P_70A74_M"  "P_75A79"    "P_75A79_F"  "P_75A79_M"  "P_80A84"    "P_80A84_F" 
 [67] "P_80A84_M"  "P_85YMAS"   "P_85YMAS_F" "P_85YMAS_M" "PROM_HNV"   "PNACENT"    "PNACENT_F"  "PNACENT_M"  "PNACOE"     "PNACOE_F"   "PNACOE_M"  
 [78] "PRES2015"   "PRES2015_F" "PRES2015_M" "PRESOE15"   "PRESOE15_F" "PRESOE15_M" "P3YM_HLI"   "P3YM_HLI_F" "P3YM_HLI_M" "P3HLINHE"   "P3HLINHE_F"
 [89] "P3HLINHE_M" "P3HLI_HE"   "P3HLI_HE_F" "P3HLI_HE_M" "PHOG_IND"   "POB_AFRO"   "POB_AFRO_F" "POB_AFRO_M" "PCON_DISC"  "PCON_LIMI"  "PSIND_LIM" 
[100] "P3A5_NOA"   "P3A5_NOA_F" "P3A5_NOA_M" "P6A11_NOA"  "P6A11_NOAF" "P6A11_NOAM" "P12A14NOA"  "P12A14NOAF" "P12A14NOAM" "P15A17A"    "P15A17A_F" 
[111] "P15A17A_M"  "P18A24A"    "P18A24A_F"  "P18A24A_M"  "P8A14AN"    "P8A14AN_F"  "P8A14AN_M"  "P15YM_AN"   "P15YM_AN_F" "P15YM_AN_M" "P15YM_SE"  
[122] "P15YM_SE_F" "P15YM_SE_M" "P15PRI_IN"  "P15PRI_INF" "P15PRI_INM" "P15PRI_CO"  "P15PRI_COF" "P15PRI_COM" "P15SEC_IN"  "P15SEC_INF" "P15SEC_INM"
[133] "P15SEC_CO"  "P15SEC_COF" "P15SEC_COM" "P18YM_PB"   "P18YM_PB_F" "P18YM_PB_M" "GRAPROES"   "GRAPROES_F" "GRAPROES_M" "PEA"        "PEA_F"     
[144] "PEA_M"      "PE_INAC"    "PE_INAC_F"  "PE_INAC_M"  "POCUPADA"   "POCUPADA_F" "POCUPADA_M" "PDESOCUP"   "PDESOCUP_F" "PDESOCUP_M" "PSINDER"   
[155] "PDER_SS"    "P12YM_SOLT" "P12YM_CASA" "P12YM_SEPA" "PCATOLICA"  "PRO_CRIEVA" "POTRAS_REL" "PSIN_RELIG" "TOTHOG"     "HOGJEF_F"   "HOGJEF_M"  
[166] "POBHOG"     "PHOGJEF_F"  "PHOGJEF_M" 
selected_df <- df |> 
      select(filtered_data)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(filtered_data)

# Now:
data %>% select(all_of(filtered_data))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
selected_df

EDA before exporting

str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ENTIDAD    : chr [1:195662] "00" "00" "00" "01" ...
 $ NOM_ENT    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
 $ MUN        : chr [1:195662] "000" "000" "000" "000" ...
 $ NOM_MUN    : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
 $ LOC        : chr [1:195662] "0000" "9998" "9999" "0000" ...
 $ NOM_LOC    : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
 $ LONGITUD   : chr [1:195662] NA NA NA NA ...
 $ LATITUD    : chr [1:195662] NA NA NA NA ...
 $ ALTITUD    : chr [1:195662] NA NA NA NA ...
 $ POBTOT     : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
 $ POBFEM     : chr [1:195662] "64540634" "96869" "61324" "728924" ...
 $ POBMAS     : chr [1:195662] "61473390" "153485" "85801" "696683" ...
 $ P_0A2      : chr [1:195662] "5764054" "10493" "6798" "71864" ...
 $ P_0A2_F    : chr [1:195662] "2848875" "5193" "3407" "35604" ...
 $ P_0A2_M    : chr [1:195662] "2915179" "5300" "3391" "36260" ...
 $ P_3YMAS    : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
 $ P_3YMAS_F  : chr [1:195662] "61554567" "91463" "57628" "692561" ...
 $ P_3YMAS_M  : chr [1:195662] "58422017" "147978" "82129" "659674" ...
 $ P_5YMAS    : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
 $ P_5YMAS_F  : chr [1:195662] "59433559" "87931" "55256" "666713" ...
 $ P_5YMAS_M  : chr [1:195662] "56259714" "144155" "79772" "632956" ...
 $ P_12YMAS   : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
 $ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
 $ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
 $ P_15YMAS   : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
 $ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
 $ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
 $ P_18YMAS   : chr [1:195662] "87492680" "186968" "104612" "960764" ...
 $ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
 $ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
 $ P_3A5      : chr [1:195662] "6462212" "10900" "7028" "78833" ...
 $ P_3A5_F    : chr [1:195662] "3193548" "5270" "3511" "38679" ...
 $ P_3A5_M    : chr [1:195662] "3268664" "5630" "3517" "40154" ...
 $ P_6A11     : chr [1:195662] "12986217" "20793" "13506" "156683" ...
 $ P_6A11_F   : chr [1:195662] "6398755" "10082" "6574" "77289" ...
 $ P_6A11_M   : chr [1:195662] "6587462" "10711" "6932" "79394" ...
 $ P_8A14     : chr [1:195662] "15287375" "24342" "16724" "181905" ...
 $ P_8A14_F   : chr [1:195662] "7531118" "11538" "7679" "89383" ...
 $ P_8A14_M   : chr [1:195662] "7756257" "12804" "9045" "92522" ...
 $ P_12A14    : chr [1:195662] "6542801" "10337" "7693" "77815" ...
 $ P_12A14_F  : chr [1:195662] "3229273" "4767" "3268" "38206" ...
 $ P_12A14_M  : chr [1:195662] "3313528" "5570" "4425" "39609" ...
 $ P_15A17    : chr [1:195662] "6492674" "10443" "6918" "78140" ...
 $ P_15A17_F  : chr [1:195662] "3202134" "4830" "3091" "38298" ...
 $ P_15A17_M  : chr [1:195662] "3290540" "5613" "3827" "39842" ...
 $ P_18A24    : chr [1:195662] "14736111" "27841" "16336" "180847" ...
 $ P_18A24_F  : chr [1:195662] "7398617" "11140" "6760" "90632" ...
 $ P_18A24_M  : chr [1:195662] "7337494" "16701" "9576" "90215" ...
 $ P_15A49_F  : chr [1:195662] "33885546" "47693" "29297" "388917" ...
 $ P_60YMAS   : chr [1:195662] "15142976" "37383" "21277" "145376" ...
 $ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
 $ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
 $ REL_H_M    : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
 $ POB0_14    : chr [1:195662] "31755284" "52523" "35025" "385195" ...
 $ POB15_64   : chr [1:195662] "83663440" "171209" "96250" "941834" ...
 $ POB65_MAS  : chr [1:195662] "10321914" "26202" "15280" "97070" ...
 $ P_0A4      : chr [1:195662] "10047365" "17848" "11527" "124430" ...
 $ P_0A4_F    : chr [1:195662] "4969883" "8725" "5779" "61452" ...
 $ P_0A4_M    : chr [1:195662] "5077482" "9123" "5748" "62978" ...
 $ P_5A9      : chr [1:195662] "10764379" "17380" "11274" "131048" ...
 $ P_5A9_F    : chr [1:195662] "5311288" "8526" "5558" "64689" ...
 $ P_5A9_M    : chr [1:195662] "5453091" "8854" "5716" "66359" ...
 $ P_10A14    : chr [1:195662] "10943540" "17295" "12224" "129717" ...
 $ P_10A14_F  : chr [1:195662] "5389280" "8061" "5423" "63637" ...
 $ P_10A14_M  : chr [1:195662] "5554260" "9234" "6801" "66080" ...
 $ P_15A19    : chr [1:195662] "10806690" "18303" "11484" "131967" ...
 $ P_15A19_F  : chr [1:195662] "5344540" "8138" "5140" "65064" ...
 $ P_15A19_M  : chr [1:195662] "5462150" "10165" "6344" "66903" ...
 $ P_20A24    : chr [1:195662] "10422095" "19981" "11770" "127020" ...
 $ P_20A24_F  : chr [1:195662] "5256211" "7832" "4711" "63866" ...
 $ P_20A24_M  : chr [1:195662] "5165884" "12149" "7059" "63154" ...
 $ P_25A29    : chr [1:195662] "9993001" "20584" "12238" "118426" ...
 $ P_25A29_F  : chr [1:195662] "5131597" "7125" "4427" "60285" ...
 $ P_25A29_M  : chr [1:195662] "4861404" "13459" "7811" "58141" ...
 $ P_30A34    : chr [1:195662] "9420827" "19601" "11315" "106825" ...
 $ P_30A34_F  : chr [1:195662] "4893101" "6309" "4074" "55174" ...
 $ P_30A34_M  : chr [1:195662] "4527726" "13292" "7241" "51651" ...
 $ P_35A39    : chr [1:195662] "9020276" "18645" "10357" "99257" ...
 $ P_35A39_F  : chr [1:195662] "4688746" "6289" "3825" "51483" ...
 $ P_35A39_M  : chr [1:195662] "4331530" "12356" "6532" "47774" ...
 $ P_40A44    : chr [1:195662] "8503586" "17934" "9705" "92378" ...
 $ P_40A44_F  : chr [1:195662] "4441282" "6060" "3743" "48539" ...
 $ P_40A44_M  : chr [1:195662] "4062304" "11874" "5962" "43839" ...
 $ P_45A49    : chr [1:195662] "7942413" "16840" "8668" "84669" ...
 $ P_45A49_F  : chr [1:195662] "4130069" "5940" "3377" "44506" ...
 $ P_45A49_M  : chr [1:195662] "3812344" "10900" "5291" "40163" ...
 $ P_50A54    : chr [1:195662] "7037532" "15070" "7878" "74121" ...
 $ P_50A54_F  : chr [1:195662] "3705369" "5481" "3239" "39510" ...
 $ P_50A54_M  : chr [1:195662] "3332163" "9589" "4639" "34611" ...
 $ P_55A59    : chr [1:195662] "5695958" "13070" "6838" "58865" ...
 $ P_55A59_F  : chr [1:195662] "3002982" "4728" "2823" "31257" ...
 $ P_55A59_M  : chr [1:195662] "2692976" "8342" "4015" "27608" ...
 $ P_60A64    : chr [1:195662] "4821062" "11181" "5997" "48306" ...
 $ P_60A64_F  : chr [1:195662] "2563200" "4050" "2511" "25871" ...
 $ P_60A64_M  : chr [1:195662] "2257862" "7131" "3486" "22435" ...
 $ P_65A69    : chr [1:195662] "3645077" "9160" "5052" "35823" ...
 $ P_65A69_F  : chr [1:195662] "1938227" "3343" "2130" "19125" ...
 $ P_65A69_M  : chr [1:195662] "1706850" "5817" "2922" "16698" ...
 $ P_70A74    : chr [1:195662] "2647340" "6903" "3852" "25586" ...
  [list output truncated]
 - attr(*, "spec")=
  .. cols(
  ..   ENTIDAD = col_character(),
  ..   NOM_ENT = col_character(),
  ..   MUN = col_character(),
  ..   NOM_MUN = col_character(),
  ..   LOC = col_character(),
  ..   NOM_LOC = col_character(),
  ..   LONGITUD = col_character(),
  ..   LATITUD = col_character(),
  ..   ALTITUD = col_character(),
  ..   POBTOT = col_double(),
  ..   POBFEM = col_character(),
  ..   POBMAS = col_character(),
  ..   P_0A2 = col_character(),
  ..   P_0A2_F = col_character(),
  ..   P_0A2_M = col_character(),
  ..   P_3YMAS = col_character(),
  ..   P_3YMAS_F = col_character(),
  ..   P_3YMAS_M = col_character(),
  ..   P_5YMAS = col_character(),
  ..   P_5YMAS_F = col_character(),
  ..   P_5YMAS_M = col_character(),
  ..   P_12YMAS = col_character(),
  ..   P_12YMAS_F = col_character(),
  ..   P_12YMAS_M = col_character(),
  ..   P_15YMAS = col_character(),
  ..   P_15YMAS_F = col_character(),
  ..   P_15YMAS_M = col_character(),
  ..   P_18YMAS = col_character(),
  ..   P_18YMAS_F = col_character(),
  ..   P_18YMAS_M = col_character(),
  ..   P_3A5 = col_character(),
  ..   P_3A5_F = col_character(),
  ..   P_3A5_M = col_character(),
  ..   P_6A11 = col_character(),
  ..   P_6A11_F = col_character(),
  ..   P_6A11_M = col_character(),
  ..   P_8A14 = col_character(),
  ..   P_8A14_F = col_character(),
  ..   P_8A14_M = col_character(),
  ..   P_12A14 = col_character(),
  ..   P_12A14_F = col_character(),
  ..   P_12A14_M = col_character(),
  ..   P_15A17 = col_character(),
  ..   P_15A17_F = col_character(),
  ..   P_15A17_M = col_character(),
  ..   P_18A24 = col_character(),
  ..   P_18A24_F = col_character(),
  ..   P_18A24_M = col_character(),
  ..   P_15A49_F = col_character(),
  ..   P_60YMAS = col_character(),
  ..   P_60YMAS_F = col_character(),
  ..   P_60YMAS_M = col_character(),
  ..   REL_H_M = col_character(),
  ..   POB0_14 = col_character(),
  ..   POB15_64 = col_character(),
  ..   POB65_MAS = col_character(),
  ..   P_0A4 = col_character(),
  ..   P_0A4_F = col_character(),
  ..   P_0A4_M = col_character(),
  ..   P_5A9 = col_character(),
  ..   P_5A9_F = col_character(),
  ..   P_5A9_M = col_character(),
  ..   P_10A14 = col_character(),
  ..   P_10A14_F = col_character(),
  ..   P_10A14_M = col_character(),
  ..   P_15A19 = col_character(),
  ..   P_15A19_F = col_character(),
  ..   P_15A19_M = col_character(),
  ..   P_20A24 = col_character(),
  ..   P_20A24_F = col_character(),
  ..   P_20A24_M = col_character(),
  ..   P_25A29 = col_character(),
  ..   P_25A29_F = col_character(),
  ..   P_25A29_M = col_character(),
  ..   P_30A34 = col_character(),
  ..   P_30A34_F = col_character(),
  ..   P_30A34_M = col_character(),
  ..   P_35A39 = col_character(),
  ..   P_35A39_F = col_character(),
  ..   P_35A39_M = col_character(),
  ..   P_40A44 = col_character(),
  ..   P_40A44_F = col_character(),
  ..   P_40A44_M = col_character(),
  ..   P_45A49 = col_character(),
  ..   P_45A49_F = col_character(),
  ..   P_45A49_M = col_character(),
  ..   P_50A54 = col_character(),
  ..   P_50A54_F = col_character(),
  ..   P_50A54_M = col_character(),
  ..   P_55A59 = col_character(),
  ..   P_55A59_F = col_character(),
  ..   P_55A59_M = col_character(),
  ..   P_60A64 = col_character(),
  ..   P_60A64_F = col_character(),
  ..   P_60A64_M = col_character(),
  ..   P_65A69 = col_character(),
  ..   P_65A69_F = col_character(),
  ..   P_65A69_M = col_character(),
  ..   P_70A74 = col_character(),
  ..   P_70A74_F = col_character(),
  ..   P_70A74_M = col_character(),
  ..   P_75A79 = col_character(),
  ..   P_75A79_F = col_character(),
  ..   P_75A79_M = col_character(),
  ..   P_80A84 = col_character(),
  ..   P_80A84_F = col_character(),
  ..   P_80A84_M = col_character(),
  ..   P_85YMAS = col_character(),
  ..   P_85YMAS_F = col_character(),
  ..   P_85YMAS_M = col_character(),
  ..   PROM_HNV = col_character(),
  ..   PNACENT = col_character(),
  ..   PNACENT_F = col_character(),
  ..   PNACENT_M = col_character(),
  ..   PNACOE = col_character(),
  ..   PNACOE_F = col_character(),
  ..   PNACOE_M = col_character(),
  ..   PRES2015 = col_character(),
  ..   PRES2015_F = col_character(),
  ..   PRES2015_M = col_character(),
  ..   PRESOE15 = col_character(),
  ..   PRESOE15_F = col_character(),
  ..   PRESOE15_M = col_character(),
  ..   P3YM_HLI = col_character(),
  ..   P3YM_HLI_F = col_character(),
  ..   P3YM_HLI_M = col_character(),
  ..   P3HLINHE = col_character(),
  ..   P3HLINHE_F = col_character(),
  ..   P3HLINHE_M = col_character(),
  ..   P3HLI_HE = col_character(),
  ..   P3HLI_HE_F = col_character(),
  ..   P3HLI_HE_M = col_character(),
  ..   P5_HLI = col_character(),
  ..   P5_HLI_NHE = col_character(),
  ..   P5_HLI_HE = col_character(),
  ..   PHOG_IND = col_character(),
  ..   POB_AFRO = col_character(),
  ..   POB_AFRO_F = col_character(),
  ..   POB_AFRO_M = col_character(),
  ..   PCON_DISC = col_character(),
  ..   PCDISC_MOT = col_character(),
  ..   PCDISC_VIS = col_character(),
  ..   PCDISC_LENG = col_character(),
  ..   PCDISC_AUD = col_character(),
  ..   PCDISC_MOT2 = col_character(),
  ..   PCDISC_MEN = col_character(),
  ..   PCON_LIMI = col_character(),
  ..   PCLIM_CSB = col_character(),
  ..   PCLIM_VIS = col_character(),
  ..   PCLIM_HACO = col_character(),
  ..   PCLIM_OAUD = col_character(),
  ..   PCLIM_MOT2 = col_character(),
  ..   PCLIM_RE_CO = col_character(),
  ..   PCLIM_PMEN = col_character(),
  ..   PSIND_LIM = col_character(),
  ..   P3A5_NOA = col_character(),
  ..   P3A5_NOA_F = col_character(),
  ..   P3A5_NOA_M = col_character(),
  ..   P6A11_NOA = col_character(),
  ..   P6A11_NOAF = col_character(),
  ..   P6A11_NOAM = col_character(),
  ..   P12A14NOA = col_character(),
  ..   P12A14NOAF = col_character(),
  ..   P12A14NOAM = col_character(),
  ..   P15A17A = col_character(),
  ..   P15A17A_F = col_character(),
  ..   P15A17A_M = col_character(),
  ..   P18A24A = col_character(),
  ..   P18A24A_F = col_character(),
  ..   P18A24A_M = col_character(),
  ..   P8A14AN = col_character(),
  ..   P8A14AN_F = col_character(),
  ..   P8A14AN_M = col_character(),
  ..   P15YM_AN = col_character(),
  ..   P15YM_AN_F = col_character(),
  ..   P15YM_AN_M = col_character(),
  ..   P15YM_SE = col_character(),
  ..   P15YM_SE_F = col_character(),
  ..   P15YM_SE_M = col_character(),
  ..   P15PRI_IN = col_character(),
  ..   P15PRI_INF = col_character(),
  ..   P15PRI_INM = col_character(),
  ..   P15PRI_CO = col_character(),
  ..   P15PRI_COF = col_character(),
  ..   P15PRI_COM = col_character(),
  ..   P15SEC_IN = col_character(),
  ..   P15SEC_INF = col_character(),
  ..   P15SEC_INM = col_character(),
  ..   P15SEC_CO = col_character(),
  ..   P15SEC_COF = col_character(),
  ..   P15SEC_COM = col_character(),
  ..   P18YM_PB = col_character(),
  ..   P18YM_PB_F = col_character(),
  ..   P18YM_PB_M = col_character(),
  ..   GRAPROES = col_character(),
  ..   GRAPROES_F = col_character(),
  ..   GRAPROES_M = col_character(),
  ..   PEA = col_character(),
  ..   PEA_F = col_character(),
  ..   PEA_M = col_character(),
  ..   PE_INAC = col_character(),
  ..   PE_INAC_F = col_character(),
  ..   PE_INAC_M = col_character(),
  ..   POCUPADA = col_character(),
  ..   POCUPADA_F = col_character(),
  ..   POCUPADA_M = col_character(),
  ..   PDESOCUP = col_character(),
  ..   PDESOCUP_F = col_character(),
  ..   PDESOCUP_M = col_character(),
  ..   PSINDER = col_character(),
  ..   PDER_SS = col_character(),
  ..   PDER_IMSS = col_character(),
  ..   PDER_ISTE = col_character(),
  ..   PDER_ISTEE = col_character(),
  ..   PAFIL_PDOM = col_character(),
  ..   PDER_SEGP = col_character(),
  ..   PDER_IMSSB = col_character(),
  ..   PAFIL_IPRIV = col_character(),
  ..   PAFIL_OTRAI = col_character(),
  ..   P12YM_SOLT = col_character(),
  ..   P12YM_CASA = col_character(),
  ..   P12YM_SEPA = col_character(),
  ..   PCATOLICA = col_character(),
  ..   PRO_CRIEVA = col_character(),
  ..   POTRAS_REL = col_character(),
  ..   PSIN_RELIG = col_character(),
  ..   TOTHOG = col_character(),
  ..   HOGJEF_F = col_character(),
  ..   HOGJEF_M = col_character(),
  ..   POBHOG = col_character(),
  ..   PHOGJEF_F = col_character(),
  ..   PHOGJEF_M = col_character(),
  ..   VIVTOT = col_double(),
  ..   TVIVHAB = col_double(),
  ..   TVIVPAR = col_character(),
  ..   VIVPAR_HAB = col_character(),
  ..   VIVPARH_CV = col_character(),
  ..   TVIVPARHAB = col_character(),
  ..   VIVPAR_DES = col_character(),
  ..   VIVPAR_UT = col_character(),
  ..   OCUPVIVPAR = col_character(),
  ..   PROM_OCUP = col_character(),
  ..   PRO_OCUP_C = col_character(),
  ..   VPH_PISODT = col_character(),
  ..   VPH_PISOTI = col_character(),
  ..   VPH_1DOR = col_character(),
  ..   VPH_2YMASD = col_character(),
  ..   VPH_1CUART = col_character(),
  ..   VPH_2CUART = col_character(),
  ..   VPH_3YMASC = col_character(),
  ..   VPH_C_ELEC = col_character(),
  ..   VPH_S_ELEC = col_character(),
  ..   VPH_AGUADV = col_character(),
  ..   VPH_AEASP = col_character(),
  ..   VPH_AGUAFV = col_character(),
  ..   VPH_TINACO = col_character(),
  ..   VPH_CISTER = col_character(),
  ..   VPH_EXCSA = col_character(),
  ..   VPH_LETR = col_character(),
  ..   VPH_DRENAJ = col_character(),
  ..   VPH_NODREN = col_character(),
  ..   VPH_C_SERV = col_character(),
  ..   VPH_NDEAED = col_character(),
  ..   VPH_DSADMA = col_character(),
  ..   VPH_NDACMM = col_character(),
  ..   VPH_SNBIEN = col_character(),
  ..   VPH_REFRI = col_character(),
  ..   VPH_LAVAD = col_character(),
  ..   VPH_HMICRO = col_character(),
  ..   VPH_AUTOM = col_character(),
  ..   VPH_MOTO = col_character(),
  ..   VPH_BICI = col_character(),
  ..   VPH_RADIO = col_character(),
  ..   VPH_TV = col_character(),
  ..   VPH_PC = col_character(),
  ..   VPH_TELEF = col_character(),
  ..   VPH_CEL = col_character(),
  ..   VPH_INTER = col_character(),
  ..   VPH_STVP = col_character(),
  ..   VPH_SPMVPI = col_character(),
  ..   VPH_CVJ = col_character(),
  ..   VPH_SINRTV = col_character(),
  ..   VPH_SINLTC = col_character(),
  ..   VPH_SINCINT = col_character(),
  ..   VPH_SINTIC = col_character(),
  ..   TAMLOC = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 

Exporting as parquet

# Export wrangled data as parquet file
table <- arrow::Table$create(selected_df)

output_dir <- here("data", "processed", "parquet_data")

arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT", "ENTIDAD"), existing_data_behavior = "overwrite")

Reading parquet

ds <- open_dataset(here("data", "processed", "parquet_data")) |> 
        collect()

ds

Puebla

ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Puebla") |> 
    collect()

ds_puebla

Yucatán

ds_yucatan <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Yucatán") |> 
    collect()

ds_yucatan

Nuevo León

ds_nuevo_leon <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Nuevo León") |> 
    collect()

ds_nuevo_leon

Total Nacional

ds_nacional <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT=="Total nacional") |> 
    collect()

ds_nacional

Verify datasets are not empty


for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()

  if (nrow(read_dfs) == 0) {
    print(paste("Dataset is empty", value))
  } else {
        print(paste("OK", value, nrow(read_dfs)))

  }

}
[1] "OK Total nacional 3"
[1] "OK Aguascalientes 2058"
[1] "OK Baja California 5566"
[1] "OK Baja California Sur 2561"
[1] "OK Campeche 2800"
[1] "OK Coahuila de Zaragoza 4149"
[1] "OK Colima 1259"
[1] "OK Chiapas 21487"
[1] "OK Chihuahua 12389"
[1] "OK Ciudad de México 666"
[1] "OK Durango 6006"
[1] "OK Guanajuato 8945"
[1] "OK Guerrero 7001"
[1] "OK Hidalgo 4916"
[1] "OK Jalisco 10715"
[1] "OK México 5136"
[1] "OK Michoacán de Ocampo 8956"
[1] "OK Morelos 1678"
[1] "OK Nayarit 2913"
[1] "OK Nuevo León 4974"
[1] "OK Oaxaca 11856"
[1] "OK Puebla 7059"
[1] "OK Querétaro 2249"
[1] "OK Quintana Roo 2243"
[1] "OK San Luis Potosí 6729"
[1] "OK Sinaloa 5552"
[1] "OK Sonora 7500"
[1] "OK Tabasco 2517"
[1] "OK Tamaulipas 6695"
[1] "OK Tlaxcala 1323"
[1] "OK Veracruz de Ignacio de la Llave 20401"
[1] "OK Yucatán 2691"
[1] "OK Zacatecas 4669"

Coordinate Lab

longitudes <- selected_df$LONGITUD
latitudes <- selected_df$LATITUD
test_lat <- longitudes[8]
test_lat
[1] "102°17'45.768\" W"
sections <- unlist(strsplit(test_lat, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])

decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
decimal_degrees
[1] -102.296
longitude_to_decimal <- function(test_long) {
    if (is.na(test_long)) {
    return(NA)  
    }
  
  sections <- unlist(strsplit(test_long, "[°'\" ]"))
  
  degrees <- as.numeric(sections[1])
  minutes <- as.numeric(sections[2])
  seconds <- as.numeric(sections[3])

  
  decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
}

latitude_to_decimal <- function(test_lat) {
    if (is.na(test_lat)) {
    return(NA)  
    }
  
  sections <- unlist(strsplit(test_lat, "[°'\" ]"))
  
  degrees <- as.numeric(sections[1])
  minutes <- as.numeric(sections[2])
  seconds <- as.numeric(sections[3])

  
  decimal_degrees <- (degrees + minutes / 60 + seconds / 3600)
}

Exporting clean

table <- arrow::Table$create(selected_clean)

output_dir <- here("data", "processed", "parquet_data_coords")

arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT", "ENTIDAD"), existing_data_behavior = "overwrite")

Verify unique cities per State

for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC))))

  }

}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1772"
[1] "NOT OK Baja California 5566 <> 4621"
[1] "NOT OK Baja California Sur 2561 <> 1832"
[1] "NOT OK Campeche 2800 <> 1894"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 3287"
[1] "NOT OK Colima 1259 <> 1035"
[1] "NOT OK Chiapas 21487 <> 10349"
[1] "NOT OK Chihuahua 12389 <> 8082"
[1] "NOT OK Ciudad de México 666 <> 617"
[1] "NOT OK Durango 6006 <> 4444"
[1] "NOT OK Guanajuato 8945 <> 6923"
[1] "NOT OK Guerrero 7001 <> 5189"
[1] "NOT OK Hidalgo 4916 <> 3690"
[1] "NOT OK Jalisco 10715 <> 6764"
[1] "NOT OK México 5136 <> 4291"
[1] "NOT OK Michoacán de Ocampo 8956 <> 6065"
[1] "NOT OK Morelos 1678 <> 1471"
[1] "NOT OK Nayarit 2913 <> 2243"
[1] "NOT OK Nuevo León 4974 <> 3328"
[1] "NOT OK Oaxaca 11856 <> 7924"
[1] "NOT OK Puebla 7059 <> 5037"
[1] "NOT OK Querétaro 2249 <> 1885"
[1] "NOT OK Quintana Roo 2243 <> 1832"
[1] "NOT OK San Luis Potosí 6729 <> 5037"
[1] "NOT OK Sinaloa 5552 <> 4064"
[1] "NOT OK Sonora 7500 <> 5710"
[1] "NOT OK Tabasco 2517 <> 2019"
[1] "NOT OK Tamaulipas 6695 <> 4601"
[1] "NOT OK Tlaxcala 1323 <> 1075"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 12141"
[1] "NOT OK Yucatán 2691 <> 1790"
[1] "NOT OK Zacatecas 4669 <> 3594"
for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$NOM_MUN_LOC <- paste(read_dfs$NOM_MUN, read_dfs$NOM_LOC, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_MUN_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_MUN_LOC))))

  }

}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1979"
[1] "NOT OK Baja California 5566 <> 5074"
[1] "NOT OK Baja California Sur 2561 <> 2212"
[1] "NOT OK Campeche 2800 <> 2368"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 4023"
[1] "NOT OK Colima 1259 <> 1177"
[1] "NOT OK Chiapas 21487 <> 18268"
[1] "NOT OK Chihuahua 12389 <> 11167"
[1] "NOT OK Ciudad de México 666 <> 663"
[1] "NOT OK Durango 6006 <> 5578"
[1] "NOT OK Guanajuato 8945 <> 8753"
[1] "NOT OK Guerrero 7001 <> 6870"
[1] "NOT OK Hidalgo 4916 <> 4870"
[1] "NOT OK Jalisco 10715 <> 10393"
[1] "NOT OK México 5136 <> 5108"
[1] "NOT OK Michoacán de Ocampo 8956 <> 8656"
[1] "NOT OK Morelos 1678 <> 1662"
[1] "NOT OK Nayarit 2913 <> 2726"
[1] "NOT OK Nuevo León 4974 <> 4641"
[1] "NOT OK Oaxaca 11856 <> 11760"
[1] "NOT OK Puebla 7059 <> 6837"
[1] "NOT OK Querétaro 2249 <> 2222"
[1] "NOT OK Quintana Roo 2243 <> 2138"
[1] "NOT OK San Luis Potosí 6729 <> 6590"
[1] "NOT OK Sinaloa 5552 <> 5141"
[1] "NOT OK Sonora 7500 <> 7141"
[1] "NOT OK Tabasco 2517 <> 2399"
[1] "NOT OK Tamaulipas 6695 <> 6265"
[1] "NOT OK Tlaxcala 1323 <> 1315"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 19225"
[1] "NOT OK Yucatán 2691 <> 2558"
[1] "NOT OK Zacatecas 4669 <> 4610"
for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$NOM_LOC_LOC <- paste(read_dfs$LOC, read_dfs$NOM_LOC, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC_LOC))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC_LOC))))

  }

}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 2026"
[1] "NOT OK Baja California 5566 <> 5548"
[1] "NOT OK Baja California Sur 2561 <> 2547"
[1] "NOT OK Campeche 2800 <> 2765"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 4031"
[1] "NOT OK Colima 1259 <> 1230"
[1] "NOT OK Chiapas 21487 <> 20987"
[1] "NOT OK Chihuahua 12389 <> 12161"
[1] "NOT OK Ciudad de México 666 <> 638"
[1] "NOT OK Durango 6006 <> 5885"
[1] "NOT OK Guanajuato 8945 <> 8792"
[1] "NOT OK Guerrero 7001 <> 6740"
[1] "NOT OK Hidalgo 4916 <> 4655"
[1] "NOT OK Jalisco 10715 <> 10277"
[1] "NOT OK México 5136 <> 4878"
[1] "NOT OK Michoacán de Ocampo 8956 <> 8602"
[1] "NOT OK Morelos 1678 <> 1581"
[1] "NOT OK Nayarit 2913 <> 2852"
[1] "NOT OK Nuevo León 4974 <> 4819"
[1] "NOT OK Oaxaca 11856 <> 10515"
[1] "NOT OK Puebla 7059 <> 6511"
[1] "NOT OK Querétaro 2249 <> 2191"
[1] "NOT OK Quintana Roo 2243 <> 2211"
[1] "NOT OK San Luis Potosí 6729 <> 6519"
[1] "NOT OK Sinaloa 5552 <> 5499"
[1] "NOT OK Sonora 7500 <> 7293"
[1] "NOT OK Tabasco 2517 <> 2474"
[1] "NOT OK Tamaulipas 6695 <> 6551"
[1] "NOT OK Tlaxcala 1323 <> 1179"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 19640"
[1] "NOT OK Yucatán 2691 <> 2430"
[1] "NOT OK Zacatecas 4669 <> 4488"
for(value in entities_csv) {
  
  read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
    filter(NOM_ENT==value) |>
    collect()
  
    read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$MUN, sep = "_")

  if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) {
    print(paste("Localities Unique", value))
  } else {
        print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN))))

  }

}
[1] "Localities Unique Total nacional"
[1] "Localities Unique Aguascalientes"
[1] "Localities Unique Baja California"
[1] "Localities Unique Baja California Sur"
[1] "Localities Unique Campeche"
[1] "Localities Unique Coahuila de Zaragoza"
[1] "Localities Unique Colima"
[1] "Localities Unique Chiapas"
[1] "Localities Unique Chihuahua"
[1] "Localities Unique Ciudad de México"
[1] "Localities Unique Durango"
[1] "Localities Unique Guanajuato"
[1] "Localities Unique Guerrero"
[1] "Localities Unique Hidalgo"
[1] "Localities Unique Jalisco"
[1] "Localities Unique México"
[1] "Localities Unique Michoacán de Ocampo"
[1] "Localities Unique Morelos"
[1] "Localities Unique Nayarit"
[1] "Localities Unique Nuevo León"
[1] "Localities Unique Oaxaca"
[1] "Localities Unique Puebla"
[1] "Localities Unique Querétaro"
[1] "Localities Unique Quintana Roo"
[1] "Localities Unique San Luis Potosí"
[1] "Localities Unique Sinaloa"
[1] "Localities Unique Sonora"
[1] "Localities Unique Tabasco"
[1] "Localities Unique Tamaulipas"
[1] "Localities Unique Tlaxcala"
[1] "Localities Unique Veracruz de Ignacio de la Llave"
[1] "Localities Unique Yucatán"
[1] "Localities Unique Zacatecas"

Conclusion

Append code to MUN and LOC

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogTWFyY28gUG9sbyBCcmF2byBNb250aWVsDQpkYXRlOiAyMDIwLTA0LTIxDQotLS0NCg0KIyMjIExpYnJhcmllcw0KDQpgYGB7cn0NCiMgaWYgKCFyZXF1aXJlKCJyZW52IikpIGluc3RhbGwucGFja2FnZXMoInJlbnYiKQ0KIyBsaWJyYXJ5KHJlbnYpDQojIHJlbnY6OnJlc3RvcmUoKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KGFycm93KQ0KYGBgDQoNCiMjIyBSZWFkIGRhdGENCg0KYGBge3J9DQp6aXBfZmlsZSA8LSBoZXJlKCJkYXRhIiwgInJhdyIsICJpdGVyXzAwX2NwdjIwMjBfY3N2LnppcCIpDQpgYGANCg0KYGBge3J9DQp0ZW1wX2RpciA8LSBoZXJlKCJ0ZW1wIikNCmRpci5jcmVhdGUodGVtcF9kaXIsIHNob3dXYXJuaW5ncyA9IEZBTFNFKQ0KDQp1bnppcCh6aXBfZmlsZSwgZmlsZXMgPSBjKCJpdGVyXzAwX2NwdjIwMjAvY29uanVudG9fZGVfZGF0b3MvY29uanVudG9fZGVfZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIsICJpdGVyXzAwX2NwdjIwMjAvZGljY2lvbmFyaW9fZGF0b3MvZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpLCBleGRpciA9IHRlbXBfZGlyKQ0KYGBgDQoNCmBgYHtyfQ0KDQpkYXRhX3BhdGggPC0gaGVyZSh0ZW1wX2RpciwNCiAgICAgICAgICAgICAgICAgIml0ZXJfMDBfY3B2MjAyMCIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvcyIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvc19pdGVyXzAwQ1NWMjAuY3N2IikNCg0KZGljdF9wYXRoIDwtIGhlcmUodGVtcF9kaXIsDQogICAgICAgICAgICAgICAgICJpdGVyXzAwX2NwdjIwMjAiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3MiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpDQoNCmluZm9fZGljdCA8LSByZWFkX2NzdihkaWN0X3BhdGgpDQpkZiA8LSByZWFkX2NzdihkYXRhX3BhdGgpDQoNCg0KdW5saW5rKHRlbXBfZGlyLCByZWN1cnNpdmUgPSBUUlVFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBFeHBvcnRpbmcgZGljdGlvbmFyeSBmaWxlDQp3cml0ZV9jc3YoaW5mb19kaWN0LA0KICAgICAgICAgIGhlcmUoImRhdGEiLCAicmF3IiwgImRpY2Npb25hcmlvX2RhdG9zX2l0ZXJfMDBDU1YyMC5jc3YiKSkNCg0KYGBgDQoNCiMjIyBFeHBsb3JhdGlvbg0KDQpgYGB7cn0NCmhlYWQoZGYpDQpoZWFkKGluZm9fZGljdCkNCmBgYA0KDQpgYGB7cn0NCnN0cihkZikNCmBgYA0KDQpgYGB7cn0NCmNsZWFuX2luZm9fZGljdCA8LSBpbmZvX2RpY3RbLWMoMTozKSwgXQ0KbmFtZXMoY2xlYW5faW5mb19kaWN0KSA8LSBjbGVhbl9pbmZvX2RpY3RbMSwgXQ0KY2xlYW5faW5mb19kaWN0IDwtIGNsZWFuX2luZm9fZGljdFstMSxdDQpjbGVhbl9pbmZvX2RpY3QgPC0gY2xlYW5faW5mb19kaWN0WywgLWMoNzoxMCldDQoNCg0KY2xlYW5faW5mb19kaWN0DQpgYGANCg0KYGBge3J9DQp1bmlxdWVfc3RhdGVzIDwtIGRmIHw+IA0KICAgICAgZGlzdGluY3QoTk9NX0VOVCkNCg0Kd3JpdGVfY3N2KHVuaXF1ZV9zdGF0ZXMsIGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgImVudGl0eV9uYW1lcy5jc3YiKSkNCg0KDQp1bmlxdWVfc3RhdGVzDQpgYGANCg0KYGBge3J9DQplbnRpdGllc19jc3YgPC0gcmVhZF9jc3YoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAiZW50aXR5X25hbWVzLmNzdiIpKSB8PiBwdWxsKCkNCg0KcHJpbnQoZW50aXRpZXNfY3N2KQ0KYGBgDQoNCiMjIyBTZWxlY3Rpbmcgcm93cyB0aGF0IHdlJ2xsIGFuYWx5emUNCg0KYGBge3J9DQpyb3dzX3RvX2luY2x1ZGUgPC0gYygxOjEyLCA1MzoxMzIsIDEzNjoxNDAsIDE0NywgMTU1OjIxMSwgMjIwOjIzMikNCg0KZmlsdGVyZWRfZGF0YSA8LSBjbGVhbl9pbmZvX2RpY3QgfD4gDQogICAgICBmaWx0ZXIocm93X251bWJlcigpICVpbiUgcm93c190b19pbmNsdWRlKSB8PiANCiAgICAgIHB1bGwoNCkNCg0KZmlsdGVyZWRfZGF0YQ0KYGBgDQoNCmBgYHtyfQ0Kc2VsZWN0ZWRfZGYgPC0gZGYgfD4gDQogICAgICBzZWxlY3QoZmlsdGVyZWRfZGF0YSkNCg0Kc2VsZWN0ZWRfZGYNCmBgYA0KDQojIyMgRURBIGJlZm9yZSBleHBvcnRpbmcNCg0KYGBge3J9DQpzdHIoZGYpDQpgYGANCg0KIyMjIEV4cG9ydGluZyBhcyBwYXJxdWV0DQoNCmBgYHtyfQ0KIyBFeHBvcnQgd3JhbmdsZWQgZGF0YSBhcyBwYXJxdWV0IGZpbGUNCnRhYmxlIDwtIGFycm93OjpUYWJsZSRjcmVhdGUoc2VsZWN0ZWRfZGYpDQoNCm91dHB1dF9kaXIgPC0gaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikNCg0KYXJyb3c6OndyaXRlX2RhdGFzZXQodGFibGUsIG91dHB1dF9kaXIsIHBhcnRpdGlvbmluZyA9IGMoIk5PTV9FTlQiLCAiRU5USURBRCIpLCBleGlzdGluZ19kYXRhX2JlaGF2aW9yID0gIm92ZXJ3cml0ZSIpDQpgYGANCg0KIyMjIFJlYWRpbmcgcGFycXVldA0KDQpgYGB7cn0NCmRzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4gDQogICAgICAgIGNvbGxlY3QoKQ0KDQpkcw0KYGBgDQoNCiMjIyBQdWVibGENCg0KYGBge3J9DQpkc19wdWVibGEgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iUHVlYmxhIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX3B1ZWJsYQ0KYGBgDQoNCiMjIyBZdWNhdMOhbg0KDQpgYGB7cn0NCmRzX3l1Y2F0YW4gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iWXVjYXTDoW4iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfeXVjYXRhbg0KYGBgDQoNCiMjIyBOdWV2byBMZcOzbg0KDQpgYGB7cn0NCmRzX251ZXZvX2xlb24gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iTnVldm8gTGXDs24iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfbnVldm9fbGVvbg0KYGBgDQoNCiMjIyBUb3RhbCBOYWNpb25hbA0KDQpgYGB7cn0NCmRzX25hY2lvbmFsIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4NCiAgICBmaWx0ZXIoTk9NX0VOVD09IlRvdGFsIG5hY2lvbmFsIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX25hY2lvbmFsDQpgYGANCg0KIyMjIFZlcmlmeSBkYXRhc2V0cyBhcmUgbm90IGVtcHR5DQoNCmBgYHtyfQ0KDQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSAwKSB7DQogICAgcHJpbnQocGFzdGUoIkRhdGFzZXQgaXMgZW1wdHkiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KIyMjIENvb3JkaW5hdGUgTGFiDQoNCmBgYHtyfQ0KbG9uZ2l0dWRlcyA8LSBzZWxlY3RlZF9kZiRMT05HSVRVRA0KbGF0aXR1ZGVzIDwtIHNlbGVjdGVkX2RmJExBVElUVUQNCmBgYA0KDQpgYGB7cn0NCnRlc3RfbG9uZyA8LSBsb25naXR1ZGVzWzhdDQp0ZXN0X2xvbmcNCmBgYA0KDQpgYGB7cn0NCnNlY3Rpb25zIDwtIHVubGlzdChzdHJzcGxpdCh0ZXN0X2xvbmcsICJbwrAnXCIgXSIpKQ0KZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KbWludXRlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzJdKQ0Kc2Vjb25kcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzNdKQ0KDQpkZWNpbWFsX2RlZ3JlZXMgPC0gKGRlZ3JlZXMgKyBtaW51dGVzIC8gNjAgKyBzZWNvbmRzIC8gMzYwMCkgKiAtMQ0KZGVjaW1hbF9kZWdyZWVzDQpgYGANCg0KYGBge3J9DQpsb25naXR1ZGVfdG9fZGVjaW1hbCA8LSBmdW5jdGlvbih0ZXN0X2xvbmcpIHsNCiAgICBpZiAoaXMubmEodGVzdF9sb25nKSkgew0KICAgIHJldHVybihOQSkgIA0KICAgIH0NCiAgDQogIHNlY3Rpb25zIDwtIHVubGlzdChzdHJzcGxpdCh0ZXN0X2xvbmcsICJbwrAnXCIgXSIpKQ0KICANCiAgZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KICBtaW51dGVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMl0pDQogIHNlY29uZHMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1szXSkNCg0KICANCiAgZGVjaW1hbF9kZWdyZWVzIDwtIChkZWdyZWVzICsgbWludXRlcyAvIDYwICsgc2Vjb25kcyAvIDM2MDApICogLTENCn0NCg0KbGF0aXR1ZGVfdG9fZGVjaW1hbCA8LSBmdW5jdGlvbih0ZXN0X2xhdCkgew0KICAgIGlmIChpcy5uYSh0ZXN0X2xhdCkpIHsNCiAgICByZXR1cm4oTkEpICANCiAgICB9DQogIA0KICBzZWN0aW9ucyA8LSB1bmxpc3Qoc3Ryc3BsaXQodGVzdF9sYXQsICJbwrAnXCIgXSIpKQ0KICANCiAgZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KICBtaW51dGVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMl0pDQogIHNlY29uZHMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1szXSkNCg0KICANCiAgZGVjaW1hbF9kZWdyZWVzIDwtIChkZWdyZWVzICsgbWludXRlcyAvIDYwICsgc2Vjb25kcyAvIDM2MDApDQp9DQpgYGANCg0KYGBge3J9DQpzZWxlY3RlZF9jbGVhbiA8LSBzZWxlY3RlZF9kZiB8PiANCiAgICAgIG11dGF0ZShsb25naXR1ZGVfZGVjaW1hbCA9IHNhcHBseShMT05HSVRVRCwgbG9uZ2l0dWRlX3RvX2RlY2ltYWwpLA0KICAgICAgICAgICAgIGxhdGl0dWRlX2RlY2ltYWwgPSBzYXBwbHkoTEFUSVRVRCwgbGF0aXR1ZGVfdG9fZGVjaW1hbCkpDQoNCnNlbGVjdGVkX2NsZWFuDQpgYGANCg0KIyMjIEV4cG9ydGluZyBjbGVhbg0KDQpgYGB7cn0NCnRhYmxlIDwtIGFycm93OjpUYWJsZSRjcmVhdGUoc2VsZWN0ZWRfY2xlYW4pDQoNCm91dHB1dF9kaXIgPC0gaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhX2Nvb3JkcyIpDQoNCmFycm93Ojp3cml0ZV9kYXRhc2V0KHRhYmxlLCBvdXRwdXRfZGlyLCBwYXJ0aXRpb25pbmcgPSBjKCJOT01fRU5UIiwgIkVOVElEQUQiKSwgZXhpc3RpbmdfZGF0YV9iZWhhdmlvciA9ICJvdmVyd3JpdGUiKQ0KYGBgDQoNCiMjIyBWZXJpZnkgdW5pcXVlIGNpdGllcyBwZXIgU3RhdGUNCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0MpKSkgew0KICAgIHByaW50KHBhc3RlKCJMb2NhbGl0aWVzIFVuaXF1ZSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk5PVCBPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSwgIjw+IiwgbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmBgYHtyfQ0KZm9yKHZhbHVlIGluIGVudGl0aWVzX2Nzdikgew0KICANCiAgcmVhZF9kZnMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT12YWx1ZSkgfD4NCiAgICBjb2xsZWN0KCkNCiAgDQogICAgcmVhZF9kZnMkTk9NX01VTl9MT0MgPC0gcGFzdGUocmVhZF9kZnMkTk9NX01VTiwgcmVhZF9kZnMkTk9NX0xPQywgc2VwID0gIl8iKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9NVU5fTE9DKSkpIHsNCiAgICBwcmludChwYXN0ZSgiTG9jYWxpdGllcyBVbmlxdWUiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJOT1QgT0siLCB2YWx1ZSwgbnJvdyhyZWFkX2RmcyksICI8PiIsIGxlbmd0aCh1bmlxdWUocmVhZF9kZnMkTk9NX01VTl9MT0MpKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KICANCiAgICByZWFkX2RmcyROT01fTE9DX0xPQyA8LSBwYXN0ZShyZWFkX2RmcyRMT0MsIHJlYWRfZGZzJE5PTV9MT0MsIHNlcCA9ICJfIikNCg0KICBpZiAobnJvdyhyZWFkX2RmcykgPT0gbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DX0xPQykpKSB7DQogICAgcHJpbnQocGFzdGUoIkxvY2FsaXRpZXMgVW5pcXVlIiwgdmFsdWUpKQ0KICB9IGVsc2Ugew0KICAgICAgICBwcmludChwYXN0ZSgiTk9UIE9LIiwgdmFsdWUsIG5yb3cocmVhZF9kZnMpLCAiPD4iLCBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0NfTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmBgYHtyfQ0KZm9yKHZhbHVlIGluIGVudGl0aWVzX2Nzdikgew0KICANCiAgcmVhZF9kZnMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT12YWx1ZSkgfD4NCiAgICBjb2xsZWN0KCkNCiAgDQogICAgcmVhZF9kZnMkTE9DX01VTiA8LSBwYXN0ZShyZWFkX2RmcyRMT0MsIHJlYWRfZGZzJE1VTiwgc2VwID0gIl8iKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJExPQ19NVU4pKSkgew0KICAgIHByaW50KHBhc3RlKCJMb2NhbGl0aWVzIFVuaXF1ZSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk5PVCBPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSwgIjw+IiwgbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyRMT0NfTVVOKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCiMjIyBDb25jbHVzaW9uDQoNCiMjIyMgQXBwZW5kIGNvZGUgdG8gTVVOIGFuZCBMT0MNCg==